Prior to running this code:

# Set Working Directory before starting

# Immcantation
suppressPackageStartupMessages(library(alakazam))
suppressPackageStartupMessages(library(shazam))
suppressPackageStartupMessages(library(tigger))
suppressPackageStartupMessages(library(dowser))

# scRepertoire
suppressPackageStartupMessages(library(scRepertoire))

suppressPackageStartupMessages(library(glmGamPoi))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(dittoSeq))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(library(devtools))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(ggpubr))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(impactSingleCellToolkit))

# Input files
processed_objects_fh <- "objects/processed_objects.RData"
imm_heavy_results_fh <- "data/results_immcantation/BCR_CSFPB_heavy_germ-pass.tsv"
input_bcr_fh         <- "data/input_immcantation/input_bcr_filtered_contig_annotations.csv"
input_tcr_fh         <- "data/input_immcantation/input_tcr_filtered_contig_annotations.csv"

seed.number <- 123456

source("resources/functions_workshop.R")

Load Processed Objects

1. load processed dataframes and objects

load(processed_objects_fh)

# Add unique_cell_id column to seurat objects
bcell_obj$unique_cell_id <- row.names(bcell_obj@meta.data)

# Remove cells with no annotations (2 cells)
tcr_df <- tcr_df[! tcr_df$SUBTYPE_ANNOTATION %in% "No Annot",]
bcr_df <- bcr_df[! bcr_df$SUBTYPE_ANNOTATION %in% "No Annot",]

# Simplify V/J genes
tcr_df$v_call <- tstrsplit(tcr_df$v_call,"\\*")[[1]]
tcr_df$j_call <- tstrsplit(tcr_df$j_call,"\\*")[[1]]
bcr_df$v_call <- tstrsplit(bcr_df$v_call,"\\*")[[1]]
bcr_df$j_call <- tstrsplit(bcr_df$j_call,"\\*")[[1]]

# Add in chain columns
bcr_df$chain <- bcr_df$locus
tcr_df$chain <- tcr_df$locus

# For B cells, IgSeq analyses are done on the Heavy chain only
bcr_results_igh <- bcr_df[bcr_df$locus %in% "IGH",]

2. input BCR - scRepertoire

input_bcr_df <- read.csv(input_bcr_fh,stringsAsFactors = F)

# unique_cell_id
barcode_vec <- tstrsplit(input_bcr_df$barcode,"-")[[1]]
sample_vec  <- tstrsplit(input_bcr_df$contig_id,":")[[1]]
input_bcr_df$unique_cell_id <- paste(barcode_vec,sample_vec,sep = "-")

# Overlap with Seurat Object
input_bcr_df <- input_bcr_df[input_bcr_df$unique_cell_id %in% colnames(bcell_obj),]
input_bcr_df <- input_bcr_df[input_bcr_df$contig_id %in% bcr_df$sequence_id,]
input_bcr_df$sample <- tstrsplit(input_bcr_df$unique_cell_id,"-")[[2]]

# Create scRepertoire dataframe list
bcr.list    <- split(input_bcr_df,input_bcr_df$sample)
uni_samples <- unique(input_bcr_df$sample)

bcell_screp <- combineBCR(bcr.list, 
                        samples = uni_samples)

3. input TCR - scRepertoire

# Load one contig file
input_tcr_df <- read.csv(input_tcr_fh,stringsAsFactors = F)

# unique_cell_id
barcode_vec <- tstrsplit(input_tcr_df$barcode,"-")[[1]]
sample_vec  <- tstrsplit(input_tcr_df$contig_id,":")[[1]]
input_tcr_df$unique_cell_id <- paste(barcode_vec,sample_vec,sep = "-")

# Overlap with Seurat Object
input_tcr_df <- input_tcr_df[input_tcr_df$unique_cell_id %in% colnames(tcell_obj),]
input_tcr_df <- input_tcr_df[input_tcr_df$contig_id %in% tcr_df$sequence_id,]
input_tcr_df$sample <- tstrsplit(input_tcr_df$unique_cell_id,"-")[[2]]

# Create scRepertoire dataframe list
tcr.list    <- split(input_tcr_df,input_tcr_df$sample)
uni_samples <- unique(input_tcr_df$sample)

tcell_screp <- combineTCR(tcr.list, 
                        samples = uni_samples)

—————————–

scRepertoire Analysis

1. Quantify Clones

# fig.width=9 | fig.width=5 for workshop
p_scr1 <- quantContig(bcell_screp, 
            cloneCall="strict", 
            chain = "IGH", 
            scale = FALSE)

p_scr2 <- quantContig(tcell_screp, 
            cloneCall="strict", 
            chain = "both", 
            scale = FALSE)

plot_grid(p_scr1,p_scr2,ncol = 2)

2. Clonotype Proportions

a. relative abundance

# fig.height=4,fig.width=12 |fig.height=3,fig.width=6 for workshop
p_scr3 <- clonalHomeostasis(bcell_screp, cloneCall = "strict",chain = "IGH")
p_scr4 <- clonalHomeostasis(tcell_screp, cloneCall = "strict",chain = "both")

plot_grid(p_scr3,p_scr4,ncol = 2)

b. proportion

  • clonotypes are sorted by abundance and binned in decending order from most abundance clonotypes ([1:10]) to the least
  • changing to ‘aa’ cloneCall criteria leads to more total B cell clonotypes in PBMC
  • B cell repertoire contains fewer total clonotypes compared to T cells
# fig.width=12 | fig.width=6 for workshop
p_scr5 <- clonalProportion(bcell_screp, chain = "IGH",cloneCall = "strict") 
p_scr6 <- clonalProportion(tcell_screp, chain = "both",cloneCall = "strict") 

plot_grid(p_scr5,p_scr6,ncol = 2)

3. Gene usage

a. BCR gene usage

  • CSF contains more restricted gene expression especially for V genes
vizGenes(bcell_screp, gene = "V", 
         chain = "IGH", 
         plot = "bar", 
         scale = TRUE)

b. TCR gene usage

vizGenes(tcell_screp, gene = "V", 
         chain = "TRB", 
         plot = "bar", 
         scale = TRUE)

3. CDR3 Lengths

p_scr5 <- lengthContig(bcell_screp, 
             cloneCall="aa", 
             chain = "IGH") 

p_scr6 <- lengthContig(tcell_screp, 
             cloneCall="aa", 
             chain = "TRB") 

plot_grid(p_scr5,p_scr6,ncol = 2)

4. Shared clonotypes

a. TCR shared clones

# Disease Patient
p_scr7 <- compareClonotypes(tcell_screp, 
                  numbers = 10,
                  chain = "both",
                  samples = c("28_CSF", "28_PBMC"), 
                  cloneCall="aa", 
                  graph = "alluvial")
# Healthy Control
p_scr8 <- compareClonotypes(tcell_screp, 
                  numbers = 10,
                  chain = "both",
                  samples = c("32_CSF", "32_PBMC"), 
                  cloneCall="aa", 
                  graph = "alluvial")

plot_grid(p_scr7,p_scr8)

b. BCR shared clonotypes

  • Ig Clonotypes do not overlap between compartments across the most abundant clonotypes
  • ‘aa’ criteria B cell clonotypes are strict and doesn’t account for somatic hypermutation
  • When defining clonotypes based on VDJ and CDR3 nucleotide sequences, more overlaps appear
  • Since the CSF repertoire is smaller, when we set numbers = NULL, we can still visualize all the shared clonotype connections
  • The Disease patient contains more shared clonotypes between PBMC and CSF
# Disease Patient
p_scr9 <- compareClonotypes(bcell_screp, 
                  chain = "IGH",
                  numbers = NULL,
                  samples = c("28_CSF", "28_PBMC"), 
                  cloneCall="strict", 
                  graph = "alluvial")
# Healthy Control
p_scr10 <- compareClonotypes(bcell_screp, 
                  chain = "IGH",
                  numbers = NULL,
                  samples = c("32_CSF", "32_PBMC"), 
                  cloneCall="strict", 
                  graph = "alluvial")

plot_grid(p_scr9,p_scr10)

5. Shared clonotype scatter plots - TCR

# fig.width=11 | fig.width=8 for workshop
p_scr11 <- scatterClonotype(tcell_screp, cloneCall ="aa", 
                 x.axis = "28_CSF", 
                 y.axis = "28_PBMC",
                 dot.size = "total",
                 graph = "proportion")

p_scr12 <- scatterClonotype(tcell_screp, cloneCall ="aa", 
                 x.axis = "32_CSF", 
                 y.axis = "32_PBMC",
                 dot.size = "total",
                 graph = "proportion")

plot_grid(p_scr11,p_scr12,ncol = 2)

6. Shared clonotypes - proportions

method  <- "strict"
p_scr13 <- clonalOverlap(bcell_screp, cloneCall=method,chain = "IGH", method="overlap")
p_scr14 <- clonalOverlap(tcell_screp, cloneCall=method,chain = "both", method="overlap")


plot_grid(p_scr13,p_scr14,ncol = 2)

7. Clonal diversity measures

a. BCR

# BCR
clonalDiversity(bcell_screp, 
                cloneCall = "strict", 
                chain = "IGH",
                group.by = "sample", 
                n.boots = 100)

a. TCR

# TCR
clonalDiversity(tcell_screp, 
                cloneCall = "aa", 
                chain = "both",
                group.by = "sample", 
                n.boots = 100)

—————————–

Immcantation Post Analysis

https://bitbucket.org/kleinstein/immcantation/src/master/training/10x_tutorial.md

1. CDR3 AA Hamming Distances across BCR data

dist_nearest <- distToNearest(bcr_results_igh)

# Automated Threshold - takes too long to run
# - Threshold setting occurs by default when running the immcantation pipeline
#threshold_output <- shazam::findThreshold(dist_nearest$dist_nearest,
#                                  method = "gmm", model = "gamma-norm",
#                                  cutoff = "user",spc = 0.99)
#threshold <- threshold_output@threshold

# generate Hamming distance histogram
p1 <- ggplot2::ggplot(subset(dist_nearest, !is.na(dist_nearest)),
             aes(x = dist_nearest)) +
        geom_histogram(color = "white", binwidth = 0.02) +
        labs(x = "Hamming distance", y = "Count") +
        scale_x_continuous(breaks = seq(0, 1, 0.1)) +
        theme_bw() +
        theme(axis.title = element_text(size = 18)) + geom_vline(xintercept=0.3, linetype = "longdash",color = "red")
plot(p1)

2. Rank diversity

https://alakazam.readthedocs.io/en/stable/vignettes/Diversity-Vignette/

-color themes

comp_colors <- c("CSF"="blue", "PB"="tomato2")
t_annot_colors <- c("CD4 T cells"="seagreen", "CD8 T cells"="steelblue")
b_annot_colors <- c("Memory B-cells"="seagreen", "naive B-cells"="steelblue","Plasma cells"="tomato2")

status_colors   <- c("Healthy"="seagreen", "Disease"="steelblue")
b_status_colors <- c("Disease"="steelblue")

a. T/B cells CSF vs. PB

  • Disease and Healthy patients
set.seed(seed.number)

# T cells
rank_curve_loc  <- estimateAbundance(tcr_df, group="COMPARTMENT", ci=0.95, nboot=200)
r1<-plot(rank_curve_loc,main_title="T cell Rank diversity CSF and PB",colors=comp_colors)

# B cells
rank_curve_loc  <- estimateAbundance(bcr_results_igh, group="COMPARTMENT", ci=0.95, nboot=200,min_n = 7)
r2<-plot(rank_curve_loc,main_title="B cell Rank diversity CSF and PB",color=comp_colors)

# Plot both curves in the same plot
panel_rank_curves <- plot_grid(r1,r2,ncol = 2)
panel_rank_curves

b. T/B cells annotated subtypes

  • Disease and Healthy patients
set.seed(seed.number)

# T cells
rank_curve_annot  <- estimateAbundance(tcr_df, group="SUBTYPE_ANNOTATION", ci=0.95, nboot=200)
r3<-plot(rank_curve_annot,main_title="Rank diversity T cell Annotation",color=t_annot_colors)

# B cells
rank_curve_annot  <- estimateAbundance(bcr_results_igh, group="SUBTYPE_ANNOTATION", ci=0.95, nboot=200,min_n = 7)
r4<-plot(rank_curve_annot,main_title="Rank diversity B cell Annotation",colors=b_annot_colors)

panel2_rank_curves <- plot_grid(r3,r4,ncol = 2)
panel2_rank_curves

c. Disease vs. Healthy

set.seed(seed.number)

rank_curve_annot  <- estimateAbundance(tcr_df, group="STATUS", ci=0.95, nboot=200)
r5<-plot(rank_curve_annot,main_title="Rank diversity T cell Status",colors=status_colors)

rank_curve_annot  <- estimateAbundance(bcr_results_igh, group="STATUS", ci=0.95, nboot=200,min_n = 8)
r6<-plot(rank_curve_annot,main_title="Rank diversity B cell Status",colors=status_colors)

# Plot both curves in the same plot
panel3_rank_curves <- plot_grid(r5,r6,ncol = 2)
panel3_rank_curves

d. Disease vs. Healthy - CSF

  • There are no BCRs in the CSF of the healthy patient
set.seed(seed.number)

# Subset CSF 
tcr_results_csf     <- tcr_df[tcr_df$COMPARTMENT %in% "CSF",]
bcr_results_igh_csf <- bcr_results_igh[bcr_results_igh$COMPARTMENT %in% "CSF",]

# TCR CSF
rank_curve_annot  <- estimateAbundance(tcr_results_csf, group="STATUS", ci=0.95, nboot=200)
r7<-plot(rank_curve_annot,main_title="Rank diversity T cell Status CSF",colors=status_colors)

# BCR CSF
rank_curve_annot  <- estimateAbundance(bcr_results_igh_csf, group="STATUS", ci=0.95, nboot=200,min_n = 8)
r8<-plot(rank_curve_annot,main_title="Rank diversity B cell Status CSF",colors=b_status_colors)

panel4_rank_curves <- plot_grid(r7,r8,ncol = 2)
panel4_rank_curves

3. V/J usage T cells

vfamily <- countGenes(tcr_results_csf, gene="v_call", groups=c("SUBTYPE_ANNOTATION", "STATUS"), 
                     clone="clone_id", mode="family")

jfamily <- countGenes(tcr_results_csf, gene="j_call", groups=c("SUBTYPE_ANNOTATION", "STATUS"), 
                     clone="clone_id", mode="family")
head(vfamily, n=4)
## # A tibble: 4 × 5
## # Groups:   SUBTYPE_ANNOTATION, STATUS [3]
##   SUBTYPE_ANNOTATION STATUS  gene   clone_count clone_freq
##   <chr>              <chr>   <chr>        <int>      <dbl>
## 1 CD4 T cells        Disease TRBV16           1    0.00115
## 2 CD4 T cells        Disease TRBV21           1    0.00115
## 3 CD4 T cells        Healthy TRBV21           1    0.00152
## 4 CD8 T cells        Disease TRBV25           1    0.00870
# fig.height=8, fig.width=9| fig.height=4, fig.width=6 for workshop
g1 <- ggplot(vfamily, aes(x=gene, y=clone_freq)) +
    theme_bw() +
    ggtitle("Clonal V Usage T cells CSF") +
    theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
    ylab("Percent of repertoire") +
    xlab("") +
    scale_color_brewer(palette="Set1") +
    geom_point(aes(color=STATUS), size=5, alpha=0.8) +
    facet_grid(. ~ SUBTYPE_ANNOTATION)

g2 <- ggplot(jfamily, aes(x=gene, y=clone_freq)) +
    theme_bw() +
    ggtitle("Clonal J Usage T cells CSF") +
    theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
    ylab("Percent of repertoire") +
    xlab("") +
    scale_color_brewer(palette="Set1") +
    geom_point(aes(color=STATUS), size=5, alpha=0.8) +
    facet_grid(. ~ SUBTYPE_ANNOTATION)

plot_grid(g1,g2,nrow=2)

4. C usage B cells

bcr_results_igh_csf <- bcr_results_igh[bcr_results_igh$COMPARTMENT %in% "CSF",]
bcr_results_igh_pb  <- bcr_results_igh[bcr_results_igh$COMPARTMENT %in% "PB",]

cfamily_csf <- countGenes(bcr_results_igh_csf, gene="c_call", groups=c("SUBTYPE_ANNOTATION", "STATUS"), 
                     clone="clone_id", mode="family")

cfamily_pb <- countGenes(bcr_results_igh_pb, gene="c_call", groups=c("SUBTYPE_ANNOTATION", "STATUS"), 
                     clone="clone_id", mode="family")
## Warning in countGenes(bcr_results_igh_pb, gene = "c_call", groups =
## c("SUBTYPE_ANNOTATION", : NA(s) found in 1 row(s) of the c_call column and
## excluded from tabulation
head(cfamily_csf, n=4)
## # A tibble: 4 × 5
## # Groups:   SUBTYPE_ANNOTATION, STATUS [2]
##   SUBTYPE_ANNOTATION STATUS  gene  clone_count clone_freq
##   <chr>              <chr>   <chr>       <int>      <dbl>
## 1 Memory B-cells     Disease IGHD            1     0.0455
## 2 Memory B-cells     Disease IGHG2           1     0.0455
## 3 Memory B-cells     Healthy IGHG2           1     0.25  
## 4 Memory B-cells     Healthy IGHM            1     0.25
# fig.height=8 | fig.height=4 for workshop

g1 <- ggplot(cfamily_csf, aes(x=gene, y=clone_freq)) +
    theme_bw() +
    ggtitle("Clonal Subtype B cells CSF") +
    theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
    ylab("Percent of repertoire") +
    xlab("") +
    scale_color_brewer(palette="Set1") +
    geom_point(aes(color=STATUS), size=5, alpha=0.8) +
    facet_grid(. ~ SUBTYPE_ANNOTATION)

g2 <- ggplot(cfamily_pb, aes(x=gene, y=clone_freq)) +
    theme_bw() +
    ggtitle("Clonal Subtype B cells PB") +
    theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
    ylab("Percent of repertoire") +
    xlab("") +
    scale_color_brewer(palette="Set1") +
    geom_point(aes(color=STATUS), size=5, alpha=0.8) +
    facet_grid(. ~ SUBTYPE_ANNOTATION)

plot_grid(g1,g2,nrow=2)

5. B cell Alpha Diversity

-color themes

comp_colors  <- c("CSF"="blue", "PB"="tomato2")
comp_colors2 <- c("PB"="tomato2")

a. generate diversity results for B cell isotypes (CSF vs. PB)

set.seed(seed.number)

bcr_results_igh_igm  <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHM",]
bcr_results_igh_iga1 <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHA1",]
bcr_results_igh_iga2 <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHA2",]
bcr_results_igh_igg1 <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHG1",]
bcr_results_igh_igg2 <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHG2",]
bcr_results_igh_igg3 <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHG3",]

MIN_OBS <- 2
NBOOT   <- 200

a1_curve <- alphaDiversity(bcr_results_igh_igm, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a2_curve <- alphaDiversity(bcr_results_igh_iga1, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a3_curve <- alphaDiversity(bcr_results_igh_iga2, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
## Warning in estimateAbundance(data, ci = 0.95, ...): Not all groups passed
## threshold min_n=2. Excluded: CSF
a4_curve <- alphaDiversity(bcr_results_igh_igg1, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a5_curve <- alphaDiversity(bcr_results_igh_igg2, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a6_curve <- alphaDiversity(bcr_results_igh_igg3, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)

table(bcr_results_igh$c_call)
## 
## IGHA1 IGHA2  IGHD IGHG1 IGHG2 IGHG3  IGHM 
##    39    18    20    45    12     8   448

b. plot diversity curves

a1<-plot(a1_curve,main_title="IgM alpha diversity",colors=comp_colors)

a2<-plot(a2_curve,main_title="IgA1 alpha diversity",colors=comp_colors2)

a3<-plot(a3_curve,main_title="IgA2 alpha diversity",colors=comp_colors2)

a4<-plot(a4_curve, main_title="IgG1 alpha diversity",colors=comp_colors)

a5<-plot(a5_curve, main_title="IgG2 alpha diversity",colors=comp_colors2)

a6<-plot(a6_curve, main_title="IgG3 alpha diversity",colors=comp_colors2)

#panel_ad <- plot_grid(a1,a2,a3,a4,a5,a6,nrow = 2)
panel_ad <- plot_grid(a1,a4,a3,nrow = 1)
panel_ad

6. T cell Alpha Diversity

-color themes

comp_colors  <- c("CSF"="blue", "PB"="tomato2")
comp_colors2 <- c("PB"="tomato2")

a. generate diversity results for T cell subtypes (CSF vs. PB)

set.seed(seed.number)

tcr_results_d      <- tcr_df[tcr_df$STATUS %in% "Disease",]
tcr_results_h      <- tcr_df[tcr_df$STATUS %in% "Healthy",]
tcr_results_d_cd4  <- tcr_results_d[tcr_results_d$SUBTYPE_ANNOTATION %in% "CD4 T cells",]
tcr_results_d_cd8  <- tcr_results_d[tcr_results_d$SUBTYPE_ANNOTATION %in% "CD8 T cells",]
tcr_results_h_cd4  <- tcr_results_h[tcr_results_h$SUBTYPE_ANNOTATION %in% "CD4 T cells",]
tcr_results_h_cd8  <- tcr_results_h[tcr_results_h$SUBTYPE_ANNOTATION %in% "CD8 T cells",]

MIN_OBS <- 2
NBOOT   <- 200

a7_curve <- alphaDiversity(tcr_results_d_cd4, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a8_curve <- alphaDiversity(tcr_results_d_cd8, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a9_curve <- alphaDiversity(tcr_results_h_cd4, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a10_curve <- alphaDiversity(tcr_results_h_cd8, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)

b. Disease T cell CSF/PB diversity

a7<-plot(a7_curve,main_title="CD4 Disease alpha diversity",colors=comp_colors)

a8<-plot(a8_curve,main_title="CD8 Disease alpha diversity",colors=comp_colors)

panel_tcr_ad <- plot_grid(a7,a8,nrow = 1)
panel_tcr_ad

c. Healthy T cell CSF/PB diversity

a9<-plot(a9_curve,main_title="CD4 Healthy alpha diversity",colors=comp_colors)

a10<-plot(a10_curve, main_title="CD8 Healthy alpha diversity",colors=comp_colors)

panel_tcr_ad2 <- plot_grid(a9,a10,nrow = 1)
panel_tcr_ad2

7. Mutational Profile of B cells

a. generate mutation counts

set.seed(seed.number)

# Calculate combined R and S mutation frequencies
bcr_csf_obs <- observedMutations(bcr_results_igh_csf, sequenceColumn="sequence_alignment",
                            germlineColumn="germline_alignment_d_mask",
                            regionDefinition=NULL,
                            frequency=TRUE, 
                            combine=TRUE,
                            nproc=4)

bcr_pb_obs <- observedMutations(bcr_results_igh_pb, sequenceColumn="sequence_alignment",
                            germlineColumn="germline_alignment_d_mask",
                            regionDefinition=NULL,
                            frequency=TRUE, 
                            combine=TRUE,
                            nproc=4)

b. plot clonotype mutations - median mutation freq

  • Far more mutations in PB when considering the scale
# CSF 
bcr_csf_mut_freq_clone <- bcr_csf_obs %>%
                    dplyr::group_by(clone_id) %>%
                    dplyr::summarize(median_mut_freq = median(mu_freq))

bcr_pb_mut_freq_clone <- bcr_pb_obs %>%
                    dplyr::group_by(clone_id) %>%
                    dplyr::summarize(median_mut_freq = median(mu_freq))

p_mut_csf <- ggplot(bcr_csf_mut_freq_clone, aes(median_mut_freq)) +
  geom_histogram(binwidth = 0.005) +
  theme_bw() + theme(axis.title = element_text(size = 18)) + ggtitle("CSF - Median clonotype mutations")

p_mut_pb <- ggplot(bcr_pb_mut_freq_clone, aes(median_mut_freq)) +
  geom_histogram(binwidth = 0.005) +
  theme_bw() + theme(axis.title = element_text(size = 18)) + ggtitle("PB - Median clonotype mutations")

plot_grid(p_mut_pb,p_mut_csf,ncol = 2)

c. boxplot across different sub-classes

# fig.height=6 | fig.height=3 for workshop

g1 <- ggplot(bcr_csf_obs, aes(x=c_call, y=mu_freq, fill=c_call)) +
    theme_bw() + ggtitle("BCR CSF Total mutations") +
    xlab("Isotype") + ylab("Mutation frequency") +
    scale_fill_brewer(palette="OrRd") + 
    geom_boxplot() + stat_compare_means(label = "p.signif", aes(group=c_call), hide.ns = TRUE)

g2 <- ggplot(bcr_pb_obs, aes(x=c_call, y=mu_freq, fill=c_call)) +
    theme_bw() + ggtitle("BCR PB Total mutations") +
    xlab("Isotype") + ylab("Mutation frequency") +
    scale_fill_brewer(palette="OrRd") + 
    geom_boxplot() + stat_compare_means(label = "p.signif", aes(group=c_call), hide.ns = TRUE)
grid.arrange(g1,g2,nrow=2)

8. Mutational hot spots Observed among all 5-mers - B cells

a. setup models

set.seed(seed.number)

bcr_results_igh_d <- bcr_results_igh[bcr_results_igh$STATUS %in% "Disease",]

clone_db      <- collapseClones(bcr_results_igh_d, nproc=1)
model_disease <- createTargetingModel(clone_db, model="rs", sequenceColumn="clonal_sequence", germlineColumn="clonal_germline")
## Warning in createMutabilityMatrix(db, sub_mat, model = model, sequenceColumn =
## sequenceColumn, : Insufficient number of mutations to infer some 5-mers. Filled
## with 0.

b. Plot Disease

m1 <- plotMutability(model_disease, nucleotides="A", style="hedgehog")

m2 <- plotMutability(model_disease, nucleotides="C", style="hedgehog")

m3 <- plotMutability(model_disease, nucleotides="G", style="hedgehog")

m4 <- plotMutability(model_disease, nucleotides="T", style="hedgehog")

9. Trees - B cells

Dowser

a. format clones

  • Need larger clonotypes across more samples to effectively use this tool
  • Trees are only made for one clonotype ID at a time
# No BCR clones of sufficient size for making trees with dowser
input_trees            <- bcr_df
input_trees$subject_id <- tstrsplit(input_trees$sample,"_")[[1]]
clones                 <-  formatClones(input_trees,minseq = 2, nproc = 4)
## Warning in processClones(clones, nproc = nproc, seq = seq_name, minseq =
## minseq): All clones have less than minseq = 2 sequences
print(clones)
## # A tibble: 0 × 2
## # Rowwise: 
## # ℹ 2 variables: clone_id <chr>, data <list>

b. build tree

  • Since our data doesn’t have one large BCR clonotype, lets generate an example tree with public data
# Follow tutorial instead

# Load example data object
data(ExampleClones)

# Pick the two largest clonotypes
ExampleClones = ExampleClones[1:2,]
plots = plotTrees(ExampleClones)

plots = plotTrees(ExampleClones, tips="c_call")

plots[[1]]